The goal of this script is to generate a Seurat object for sample F59.
LogNormalize, then doublets
detection using scran hybrid and scDblFinder
method, and doublet cells removalLogNormalize, for only the remaining
cellsPCAtSNE and UMAPlibrary(dplyr)
library(patchwork)
library(ggplot2)
.libPaths()
## [1] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
out_dir = "."
We load the parameters :
sample_name = params$sample_name # "F31W"
# sample_name = "F31W"
Input count matrix is there :
count_matrix_dir = paste0(out_dir, "/input/", sample_name, "/raw_feature_bc_matrix")
We load the markers and specific colors for each cell type :
cell_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_cell_markers.rds"))
cell_markers = lapply(cell_markers, FUN = toupper)
lengths(cell_markers)
## CD4 T cells CD8 T cells Langerhans cells
## 13 13 9
## macrophages B cells cuticle
## 10 16 15
## cortex medulla IRS
## 16 10 16
## proliferative ORS IFE basal
## 20 15 16
## IFE granular spinous HF-SCs melanocytes
## 17 17 10
## sebocytes
## 8
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank())
We load markers to display on the dotplot :
dotplot_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers = lapply(dotplot_markers, FUN = toupper)
dotplot_markers
## $`CD4 T cells`
## [1] "CD3E" "CD4"
##
## $`CD8 T cells`
## [1] "CD3E" "CD8A"
##
## $`Langerhans cells`
## [1] "CD207" "CPVL"
##
## $macrophages
## [1] "TREM2" "MSR1"
##
## $`B cells`
## [1] "CD79A" "CD79B"
##
## $cuticle
## [1] "KRT32" "KRT35"
##
## $cortex
## [1] "KRT31" "PRR9"
##
## $medulla
## [1] "BAMBI" "ADLH1A3"
##
## $IRS
## [1] "KRT71" "KRT73"
##
## $proliferative
## [1] "TOP2A" "MCM5"
##
## $ORS
## [1] "KRT16" "KRT6C"
##
## $`IFE basal`
## [1] "KRT15" "SAA1"
##
## $`IFE granular spinous`
## [1] "SPINK5" "KRT1"
##
## $`HF-SCs`
## [1] "DIO2" "TCEAL2"
##
## $melanocytes
## [1] "DCT" "MLANA"
##
## $sebocytes
## [1] "CLMP" "PPARG"
We load metadata for this sample :
sample_info = readRDS(paste0(out_dir, "/../1_metadata/wu_sample_info.rds"))
sample_info %>%
dplyr::filter(project_name == sample_name)
## project_name sample_type sample_identifier platform gender location
## 1 F59 HD Wu_HD_4 10X F hair scalp
## laboratory color
## 1 Wu darkviolet
These is a parameter for different functions :
cl = aquarius::create_parallel_instance(nthreads = 3L)
cut_log_nCount_RNA = 6
cut_nFeature_RNA = 500
cut_percent.mt = 20
cut_percent.rb = 50
In this section, we load the raw count matrix. Then, we applied an empty droplets filtering.
sobj = aquarius::load_sc_data(data_path = count_matrix_dir,
sample_name = sample_name,
my_seed = 1337L)
## [1] 27955 737280
## [1] 35415791
## [1] 27955 3299
## [1] 32443090
## [1] 0.9160628
sobj
## An object of class Seurat
## 27955 features across 3299 samples within 1 assay
## Active assay: RNA (27955 features, 0 variable features)
(Time to run : 96.16 s)
We set an upper bound to 20,000 cells. Remaining cells correspond to ambient RNA.
if (ncol(sobj) > 20000) {
sobj$nb_UMI = Seurat::GetAssayData(sobj, assay = "RNA", slot = "counts") %>%
Matrix::colSums()
thresh_20k = sort(sobj$nb_UMI, decreasing = TRUE)[20000]
sobj = subset(sobj, nb_UMI >= thresh_20k)
sobj$nb_UMI = NULL
sobj
}
In genes metadata, we add the Ensembl ID. The
sobj@assays$RNA@meta.features dataframe contains three
information :
rownames : gene names stored as the dimnames of the
count matrix. Duplicated gene names will have a .1 at the
end of their nameEnsembl_ID : EnsemblID, as stored in the
features.tsv.gz filegene_name : gene_name, as stored in the
features.tsv.gz file. Duplicated gene names will have the
same name.features_df = read.csv(paste0(count_matrix_dir, "/features.tsv.gz"), sep = "\t", header = 0)
features_df = features_df[, c(1:2)]
colnames(features_df) = c("Ensembl_ID", "gene_name")
rownames(features_df) = rownames(sobj) # mandatory for Seurat::FindVariableFeatures
sobj@assays$RNA@meta.features = features_df
rm(features_df)
head(sobj@assays$RNA@meta.features)
## Ensembl_ID gene_name
## MIR1302-2HG ENSG00000243485 MIR1302-2HG
## FAM138A ENSG00000237613 FAM138A
## OR4F5 ENSG00000186092 OR4F5
## AL627309.1 ENSG00000238009 AL627309.1
## AL627309.3 ENSG00000239945 AL627309.3
## AL627309.4 ENSG00000241599 AL627309.4
We add the same columns as in metadata :
row_oi = (sample_info$project_name == sample_name)
sobj$project_name = sample_name
sobj$sample_identifier = sample_info[row_oi, "sample_identifier"]
sobj$sample_type = sample_info[row_oi, "sample_type"]
sobj$location = sample_info[row_oi, "location"]
sobj$laboratory = sample_info[row_oi, "laboratory"]
colnames(sobj@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA"
## [4] "log_nCount_RNA" "project_name" "sample_identifier"
## [7] "sample_type" "location" "laboratory"
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 27955 features across 3299 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
We generate a tSNE to visualize cells before filtering.
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj
## An object of class Seurat
## 27955 features across 3299 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We annotate cells for cell type using
Seurat::AddModuleScore function.
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells
## 4 1 22
## macrophages B cells cuticle
## 16 4 380
## cortex medulla IRS
## 418 56 200
## proliferative ORS IFE basal
## 232 830 250
## IFE granular spinous HF-SCs melanocytes
## 539 249 28
## sebocytes
## 70
(Time to run : 18.74 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", "MSX2", "KRT16",
unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase using Seurat and
cyclone.
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 2518 270 511
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 1625 130 332
## G2M 296 101 71
## S 597 39 108
(Time to run : 293.03 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
In this section, we look at the number of genes expressed by each cell, the number of UMI, the percentage of mitochondrial genes expressed, and the percentage of ribosomal genes expressed. Then, without taking into account the cells expressing low number of genes or have low number of UMI, we identify doublet cells.
We compute four quality metrics :
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^MT", col.name = "percent.mt")
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^RP[L|S][0-9]*$", col.name = "percent.rb")
head(sobj@meta.data)
## orig.ident nCount_RNA nFeature_RNA log_nCount_RNA
## AAACCTGAGGATGCGT-1 F59 14410 2913 9.575678
## AAACCTGGTGACCAAG-1 F59 7950 1989 8.980927
## AAACCTGGTTCAACCA-1 F59 16162 3468 9.690418
## AAACGGGAGATCGGGT-1 F59 7871 2305 8.970940
## AAACGGGAGATCTGAA-1 F59 4669 1515 8.448700
## AAACGGGAGTCCGTAT-1 F59 13462 3379 9.507626
## project_name sample_identifier sample_type location
## AAACCTGAGGATGCGT-1 F59 Wu_HD_4 HD hair scalp
## AAACCTGGTGACCAAG-1 F59 Wu_HD_4 HD hair scalp
## AAACCTGGTTCAACCA-1 F59 Wu_HD_4 HD hair scalp
## AAACGGGAGATCGGGT-1 F59 Wu_HD_4 HD hair scalp
## AAACGGGAGATCTGAA-1 F59 Wu_HD_4 HD hair scalp
## AAACGGGAGTCCGTAT-1 F59 Wu_HD_4 HD hair scalp
## laboratory score_CD4_T_cells score_CD8_T_cells
## AAACCTGAGGATGCGT-1 Wu -0.013337771 -0.003414222
## AAACCTGGTGACCAAG-1 Wu -0.006583817 -0.003956688
## AAACCTGGTTCAACCA-1 Wu -0.010902235 -0.002339977
## AAACGGGAGATCGGGT-1 Wu -0.011628382 -0.003015696
## AAACGGGAGATCTGAA-1 Wu -0.004122051 -0.004128726
## AAACGGGAGTCCGTAT-1 Wu -0.009879746 -0.001349420
## score_Langerhans_cells score_macrophages score_B_cells
## AAACCTGAGGATGCGT-1 -0.02354809 -0.01396873 -0.0013584291
## AAACCTGGTGACCAAG-1 -0.02302573 -0.01469304 -0.0010495080
## AAACCTGGTTCAACCA-1 -0.02571827 -0.01488748 -0.0021723687
## AAACGGGAGATCGGGT-1 -0.02690270 -0.01885354 -0.0021133870
## AAACGGGAGATCTGAA-1 -0.01270098 -0.01123569 -0.0007376234
## AAACGGGAGTCCGTAT-1 -0.03063849 -0.02044481 -0.0017896623
## score_cuticle score_cortex score_medulla score_IRS
## AAACCTGAGGATGCGT-1 -0.02561077 0.31941047 0.09039984 0.9705626
## AAACCTGGTGACCAAG-1 -0.17396726 -0.25388817 -0.12595660 -0.1868182
## AAACCTGGTTCAACCA-1 0.24520269 -0.03920086 0.09422159 1.1160567
## AAACGGGAGATCGGGT-1 0.03420148 -0.18123253 0.22287679 -0.3038485
## AAACGGGAGATCTGAA-1 -0.28817233 -0.03190088 -0.20260281 -0.2793889
## AAACGGGAGTCCGTAT-1 -0.31640217 -0.36047544 -0.20290521 -0.1511563
## score_proliferative score_ORS score_IFE_basal
## AAACCTGAGGATGCGT-1 -0.03895746 0.053875673 -0.43615401
## AAACCTGGTGACCAAG-1 0.26706536 0.004559663 0.21790947
## AAACCTGGTTCAACCA-1 -0.06121126 -0.460322764 -0.46029075
## AAACGGGAGATCGGGT-1 0.12048044 -0.665390170 -0.17238269
## AAACGGGAGATCTGAA-1 -0.07122020 -0.503366732 0.07691354
## AAACGGGAGTCCGTAT-1 -0.11982548 0.046164320 -0.12410580
## score_IFE_granular_spinous score_HF-SCs score_melanocytes
## AAACCTGAGGATGCGT-1 -0.07511191 -0.08615257 0.01490172
## AAACCTGGTGACCAAG-1 0.48937919 -0.10461540 -0.08162361
## AAACCTGGTTCAACCA-1 -0.14457478 -0.11362769 -0.07364392
## AAACGGGAGATCGGGT-1 -0.42814050 -0.12251578 -0.09672922
## AAACGGGAGATCTGAA-1 -0.11059107 0.09961177 -0.07185430
## AAACGGGAGTCCGTAT-1 0.84949537 0.01986490 -0.06927744
## score_sebocytes cell_type Seurat.Phase
## AAACCTGAGGATGCGT-1 -0.09186069 IRS S
## AAACCTGGTGACCAAG-1 0.16404161 IFE granular spinous S
## AAACCTGGTTCAACCA-1 -0.15261283 IRS G1
## AAACGGGAGATCGGGT-1 -0.03417658 medulla S
## AAACGGGAGATCTGAA-1 0.46282530 sebocytes G1
## AAACGGGAGTCCGTAT-1 0.02725068 IFE granular spinous G1
## cyclone.Phase percent.mt percent.rb
## AAACCTGAGGATGCGT-1 G1 1.339348 34.17071
## AAACCTGGTGACCAAG-1 G1 5.773585 28.83019
## AAACCTGGTTCAACCA-1 G1 1.850019 31.15951
## AAACGGGAGATCGGGT-1 S 5.386863 27.82366
## AAACGGGAGATCTGAA-1 S 5.054616 30.13493
## AAACGGGAGTCCGTAT-1 G1 8.624276 19.35077
We get the cell barcodes for the failing cells :
fail_percent.mt = sobj@meta.data %>% dplyr::filter(percent.mt > cut_percent.mt) %>% rownames()
fail_percent.rb = sobj@meta.data %>% dplyr::filter(percent.rb > cut_percent.rb) %>% rownames()
fail_log_nCount_RNA = sobj@meta.data %>% dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA) %>% rownames()
fail_nFeature_RNA = sobj@meta.data %>% dplyr::filter(nFeature_RNA < cut_nFeature_RNA) %>% rownames()
Without taking into account the low UMI and low number of features cells, we identify doublets.
fsobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA)))
fsobj
## An object of class Seurat
## 27955 features across 2884 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
On this filtered dataset, we apply doublet cells detection. Just before, we run the normalization, taking into account only the remaining cells.
fsobj = Seurat::NormalizeData(fsobj,
normalization.method = "LogNormalize",
assay = "RNA")
fsobj = Seurat::FindVariableFeatures(fsobj,
assay = "RNA",
nfeatures = 3000)
fsobj
## An object of class Seurat
## 27955 features across 2884 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We identify doublet cells :
fsobj = aquarius::find_doublets(sobj = fsobj,
BPPARAM = cl)
## [1] 27955 2884
##
## FALSE TRUE
## 2611 273
## [15:52:31] WARNING: amalgamation/../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
##
## FALSE TRUE
## 2611 273
##
## FALSE TRUE
## 2478 406
fail_doublets_consensus = Seurat::WhichCells(fsobj, expression = doublets_consensus.class)
fail_doublets_scDblFinder = Seurat::WhichCells(fsobj, expression = scDblFinder.class)
fail_doublets_hybrid = Seurat::WhichCells(fsobj, expression = hybrid_score.class)
(Time to run : 95.1 s)
We add the information in the non filtered Seurat object :
sobj$doublets_consensus.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_consensus ~ TRUE,
!(colnames(sobj) %in% fail_doublets_consensus) ~ FALSE)
sobj$scDblFinder.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_scDblFinder ~ TRUE,
!(colnames(sobj) %in% fail_doublets_scDblFinder) ~ FALSE)
sobj$hybrid_score.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_hybrid ~ TRUE,
!(colnames(sobj) %in% fail_doublets_hybrid) ~ FALSE)
We can visualize the 4 cells quality with a Venn diagram :
n_filtered = c(fail_percent.mt, fail_percent.rb, fail_log_nCount_RNA, fail_nFeature_RNA) %>%
unique() %>% length()
percent_filtered = round(100*(n_filtered/ncol(sobj)), 2)
ggvenn::ggvenn(list(percent.mt = fail_percent.mt,
percent.rb = fail_percent.rb,
log_nCount_RNA = fail_log_nCount_RNA,
nFeature_RNA = fail_nFeature_RNA),
fill_color = c("#0073C2FF", "#EFC000FF", "orange", "pink"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::labs(title = "Filtered out cells",
subtitle = paste0(n_filtered, " cells (", percent_filtered, " % of all cells)")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of UMI, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "log_nCount_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_log_nCount_RNA)
Seurat::VlnPlot(sobj, features = "log_nCount_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_log_nCount_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_log_nCount_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "log_nCount_RNA",
subtitle = paste0(length(fail_log_nCount_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of features, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "nFeature_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_nFeature_RNA)
Seurat::VlnPlot(sobj, features = "nFeature_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_nFeature_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_nFeature_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "nFeature_RNA",
subtitle = paste0(length(fail_nFeature_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for mitochondrial gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.mt",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.mt)
Seurat::VlnPlot(sobj, features = "percent.mt", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.mt, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.mt,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.mt",
subtitle = paste0(length(fail_percent.mt), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for ribosomal gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.rb",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.rb)
Seurat::VlnPlot(sobj, features = "percent.rb", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.rb, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.rb,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.rb",
subtitle = paste0(length(fail_percent.rb), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
We would like to see if the number of feature expressed by cell, and
the number of UMI is correlated with the cell type, the percentage of
mitochondrial and ribosomal gene expressed, and the doublet status. We
build the log_nCount_RNA by nFeature_RNA
figure, where cells (dots) are colored by these different metrics.
This is the figure, colored by cell type :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "cell_type",
col_colors = unname(color_markers),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of mitochondrial genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.mt",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of ribosomal genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.rb",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(doublets_consensus.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "doublets_consensus.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(scDblFinder.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "scDblFinder.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(hybrid_score.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "hybrid_score.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
Do filtered cells belong to a particular cell type ?
sobj$all_cells = TRUE
plot_list = list()
## All cells
df = sobj@meta.data
if (nrow(df) == 0) {
plot_list[[1]] = ggplot()
} else {
plot_list[[1]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "All cells",
subtitle = paste(nrow(df), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## Doublets consensus
df = sobj@meta.data %>%
dplyr::filter(doublets_consensus.class)
if (nrow(df) == 0) {
plot_list[[2]] = ggplot()
} else {
plot_list[[2]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "doublets_consensus.class",
subtitle = paste(sum(sobj$doublets_consensus.class, na.rm = TRUE), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.mt
df = sobj@meta.data %>%
dplyr::filter(percent.mt > cut_percent.mt)
if (nrow(df) == 0) {
plot_list[[3]] = ggplot()
} else {
plot_list[[3]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.mt >", cut_percent.mt),
subtitle = paste(length(fail_percent.mt), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.rb
df = sobj@meta.data %>%
dplyr::filter(percent.rb > cut_percent.rb)
if (nrow(df) == 0) {
plot_list[[4]] = ggplot()
} else {
plot_list[[4]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.rb >", cut_percent.rb),
subtitle = paste(length(fail_percent.rb), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## log_nCount_RNA
df = sobj@meta.data %>%
dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA)
if (nrow(df) == 0) {
plot_list[[5]] = ggplot()
} else {
plot_list[[5]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("log_nCount_RNA <", round(cut_log_nCount_RNA, 2)),
subtitle = paste(length(fail_log_nCount_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## nFeature_RNA
df = sobj@meta.data %>%
dplyr::filter(nFeature_RNA < cut_nFeature_RNA)
if (nrow(df) == 0) {
plot_list[[6]] = ggplot()
} else {
plot_list[[6]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("nFeature_RNA <", round(cut_nFeature_RNA, 2)),
subtitle = paste(length(fail_nFeature_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
patchwork::wrap_plots(plot_list, ncol = 3) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We can compare doublet detection methods with a Venn diagram :
ggvenn::ggvenn(list(hybrid = fail_doublets_hybrid,
scDblFinder = fail_doublets_scDblFinder),
fill_color = c("#0073C2FF", "#EFC000FF"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::ggtitle(label = "Doublet cells") +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"))
We visualize cells annotation for doublets :
plot_list = list()
# scDblFinder.class
sobj$fail = ifelse(sobj$scDblFinder.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "scDblFinder.class",
subtitle = paste0(sum(sobj$scDblFinder.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# hybrid_score.class
sobj$fail = ifelse(sobj$hybrid_score.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "hybrid_score.class",
subtitle = paste0(sum(sobj$hybrid_score.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
sobj$fail = NULL
# Plot
patchwork::wrap_plots(plot_list, nrow = 1)
We could save this object before filtering (remove
eval = FALSE) :
saveRDS(sobj, paste0(out_dir, "/datasets/", sample_name, "_sobj_unfiltered.rds"))
We remove :
sobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA,
fail_percent.mt, fail_percent.rb,
fail_doublets_consensus)))
sobj
## An object of class Seurat
## 27955 features across 2445 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We normalize the count matrix for remaining cells :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 27955 features across 2445 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We perform a PCA :
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE and a UMAP with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
We annotate cells for cell type, with the new normalized expression matrix :
score_columns = grep(x = colnames(sobj@meta.data), pattern = "^score", value = TRUE)
sobj@meta.data[, score_columns] = NULL
sobj$cell_type = NULL
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells
## 2 1 18
## macrophages B cells cuticle
## 13 2 301
## cortex medulla IRS
## 333 39 157
## proliferative ORS IFE basal
## 168 587 212
## IFE granular spinous HF-SCs melanocytes
## 379 172 16
## sebocytes
## 45
(Time to run : 13.35 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = "RNA_pca_20_tsne") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase :
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 1987 122 336
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 1316 53 225
## G2M 242 48 43
## S 429 21 68
(Time to run : 229.73 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
We make a highly resolutive clustering :
sobj = Seurat::FindNeighbors(sobj, reduction = "RNA_pca", dims = c(1:ndims))
sobj = Seurat::FindClusters(sobj, resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2445
## Number of edges: 75176
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7941
## Number of communities: 23
## Elapsed time: 0 seconds
table(sobj$seurat_clusters)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 241 213 195 179 178 159 147 142 133 126 115 95 89 75 61 54 45 43 39 35
## 20 21 22
## 31 27 23
We can visualize the cell type :
tsne = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_tsne"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_umap"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from Seurat :
tsne = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from cyclone :
tsne = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize the clustering :
tsne = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize all cell types markers on the tSNE :
markers = dotplot_markers %>% unlist() %>% unname()
markers = markers[markers %in% rownames(sobj)]
plot_list = lapply(markers,
FUN = function(one_gene) {
p = Seurat::FeaturePlot(sobj, features = one_gene,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
ggplot2::labs(title = one_gene) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(aspect.ratio = 1,
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes()
return(p)
})
patchwork::wrap_plots(plot_list, ncol = 4)
We save the annotated and filtered Seurat object :
saveRDS(sobj, file = paste0(out_dir, "/datasets/", sample_name, "_sobj_filtered.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.5 patchwork_1.1.2 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 parallel_3.6.3
## [33] scater_1.14.6 irlba_2.3.3
## [35] DEoptimR_1.0-9 tidygraph_1.1.2
## [37] Rcpp_1.0.9 readr_2.0.2
## [39] KernSmooth_2.23-17 carrier_0.1.0
## [41] promises_1.1.0 gdata_2.18.0
## [43] DelayedArray_0.12.3 limma_3.42.2
## [45] graph_1.64.0 RcppParallel_5.1.4
## [47] Hmisc_4.4-0 fs_1.5.2
## [49] RSpectra_0.16-0 fastmatch_1.1-0
## [51] ranger_0.12.1 digest_0.6.25
## [53] png_0.1-7 sctransform_0.2.1
## [55] cowplot_1.0.0 DOSE_3.12.0
## [57] ggvenn_0.1.9 here_1.0.1
## [59] TInGa_0.0.0.9000 ggraph_2.0.3
## [61] pkgconfig_2.0.3 GO.db_3.10.0
## [63] DelayedMatrixStats_1.8.0 gower_0.2.1
## [65] ggbeeswarm_0.6.0 iterators_1.0.12
## [67] DropletUtils_1.6.1 reticulate_1.26
## [69] clusterProfiler_3.14.3 SummarizedExperiment_1.16.1
## [71] circlize_0.4.15 beeswarm_0.4.0
## [73] GetoptLong_1.0.5 xfun_0.35
## [75] bslib_0.3.1 zoo_1.8-10
## [77] tidyselect_1.1.0 reshape2_1.4.4
## [79] purrr_0.3.4 ica_1.0-2
## [81] pcaPP_1.9-73 viridisLite_0.3.0
## [83] rtracklayer_1.46.0 rlang_1.0.2
## [85] hexbin_1.28.1 jquerylib_0.1.4
## [87] dyneval_0.9.9 glue_1.4.2
## [89] RColorBrewer_1.1-2 matrixStats_0.56.0
## [91] stringr_1.4.0 lava_1.6.7
## [93] europepmc_0.3 DESeq2_1.26.0
## [95] recipes_0.1.17 labeling_0.3
## [97] httpuv_1.5.2 class_7.3-17
## [99] BiocNeighbors_1.4.2 DO.db_2.9
## [101] annotate_1.64.0 jsonlite_1.7.2
## [103] XVector_0.26.0 bit_4.0.4
## [105] mime_0.9 aquarius_0.1.5
## [107] Rsamtools_2.2.3 gridExtra_2.3
## [109] gplots_3.0.3 stringi_1.4.6
## [111] processx_3.5.2 gsl_2.1-6
## [113] bitops_1.0-6 cli_3.0.1
## [115] batchelor_1.2.4 RSQLite_2.2.0
## [117] randomForest_4.6-14 tidyr_1.1.4
## [119] data.table_1.14.2 rstudioapi_0.13
## [121] org.Mm.eg.db_3.10.0 GenomicAlignments_1.22.1
## [123] nlme_3.1-147 qvalue_2.18.0
## [125] scran_1.14.6 locfit_1.5-9.4
## [127] scDblFinder_1.1.8 listenv_0.8.0
## [129] ggthemes_4.2.4 gridGraphics_0.5-0
## [131] R.oo_1.24.0 dbplyr_1.4.4
## [133] BiocGenerics_0.32.0 TTR_0.24.2
## [135] readxl_1.3.1 lifecycle_1.0.1
## [137] timeDate_3043.102 ggpattern_0.3.1
## [139] munsell_0.5.0 cellranger_1.1.0
## [141] R.methodsS3_1.8.1 proxyC_0.1.5
## [143] visNetwork_2.0.9 caTools_1.18.0
## [145] codetools_0.2-16 Biobase_2.46.0
## [147] GenomeInfoDb_1.22.1 vipor_0.4.5
## [149] lmtest_0.9-38 msigdbr_7.5.1
## [151] htmlTable_1.13.3 triebeard_0.3.0
## [153] lsei_1.2-0 xtable_1.8-4
## [155] ROCR_1.0-7 BiocManager_1.30.10
## [157] scatterplot3d_0.3-41 abind_1.4-5
## [159] farver_2.0.3 parallelly_1.28.1
## [161] RANN_2.6.1 askpass_1.1
## [163] GenomicRanges_1.38.0 RcppAnnoy_0.0.16
## [165] tibble_3.1.5 ggdendro_0.1-20
## [167] cluster_2.1.0 future.apply_1.5.0
## [169] Seurat_3.1.5 dendextend_1.15.1
## [171] Matrix_1.3-2 ellipsis_0.3.2
## [173] prettyunits_1.1.1 lubridate_1.7.9
## [175] ggridges_0.5.2 igraph_1.2.5
## [177] RcppEigen_0.3.3.7.0 fgsea_1.12.0
## [179] remotes_2.4.2 scBFA_1.0.0
## [181] destiny_3.0.1 VIM_6.1.1
## [183] testthat_3.1.0 htmltools_0.5.2
## [185] BiocFileCache_1.10.2 yaml_2.2.1
## [187] utf8_1.1.4 plotly_4.9.2.1
## [189] XML_3.99-0.3 ModelMetrics_1.2.2.2
## [191] e1071_1.7-3 foreign_0.8-76
## [193] withr_2.5.0 fitdistrplus_1.0-14
## [195] BiocParallel_1.20.1 xgboost_1.4.1.1
## [197] bit64_4.0.5 foreach_1.5.0
## [199] robustbase_0.93-9 Biostrings_2.54.0
## [201] GOSemSim_2.13.1 rsvd_1.0.3
## [203] memoise_2.0.0 evaluate_0.18
## [205] forcats_0.5.0 rio_0.5.16
## [207] geneplotter_1.64.0 tzdb_0.1.2
## [209] caret_6.0-86 ps_1.6.0
## [211] DiagrammeR_1.0.6.1 curl_4.3
## [213] fdrtool_1.2.15 fansi_0.4.1
## [215] highr_0.8 urltools_1.7.3
## [217] xts_0.12.1 GSEABase_1.48.0
## [219] acepack_1.4.1 edgeR_3.28.1
## [221] checkmate_2.0.0 scds_1.2.0
## [223] cachem_1.0.6 npsurv_0.4-0
## [225] babelgene_22.3 rjson_0.2.20
## [227] openxlsx_4.1.5 ggrepel_0.9.1
## [229] clue_0.3-60 rprojroot_2.0.2
## [231] stabledist_0.7-1 tools_3.6.3
## [233] sass_0.4.0 nichenetr_1.1.1
## [235] magrittr_2.0.1 RCurl_1.98-1.2
## [237] proxy_0.4-24 car_3.0-11
## [239] ape_5.3 ggplotify_0.0.5
## [241] xml2_1.3.2 httr_1.4.2
## [243] assertthat_0.2.1 rmarkdown_2.18
## [245] boot_1.3-25 globals_0.14.0
## [247] R6_2.4.1 Rhdf5lib_1.8.0
## [249] nnet_7.3-14 RcppHNSW_0.2.0
## [251] progress_1.2.2 genefilter_1.68.0
## [253] statmod_1.4.34 gtools_3.8.2
## [255] shape_1.4.6 HDF5Array_1.14.4
## [257] BiocSingular_1.2.2 rhdf5_2.30.1
## [259] splines_3.6.3 AUCell_1.8.0
## [261] carData_3.0-4 colorspace_1.4-1
## [263] generics_0.1.0 stats4_3.6.3
## [265] base64enc_0.1-3 dynfeature_1.0.0
## [267] smoother_1.1 gridtext_0.1.1
## [269] pillar_1.6.3 tweenr_1.0.1
## [271] sp_1.4-1 ggplot.multistats_1.0.0
## [273] rvcheck_0.1.8 GenomeInfoDbData_1.2.2
## [275] plyr_1.8.6 gtable_0.3.0
## [277] zip_2.2.0 knitr_1.41
## [279] ComplexHeatmap_2.14.0 latticeExtra_0.6-29
## [281] biomaRt_2.42.1 IRanges_2.20.2
## [283] fastmap_1.1.0 ADGofTest_0.3
## [285] copula_1.0-0 doParallel_1.0.15
## [287] AnnotationDbi_1.48.0 vcd_1.4-8
## [289] babelwhale_1.0.1 openssl_1.4.1
## [291] scales_1.1.1 backports_1.2.1
## [293] S4Vectors_0.24.4 ipred_0.9-12
## [295] enrichplot_1.6.1 hms_1.1.1
## [297] ggforce_0.3.1 Rtsne_0.15
## [299] shiny_1.7.1 numDeriv_2016.8-1.1
## [301] polyclip_1.10-0 grid_3.6.3
## [303] lazyeval_0.2.2 Formula_1.2-3
## [305] tsne_0.1-3 crayon_1.3.4
## [307] MASS_7.3-54 pROC_1.16.2
## [309] viridis_0.5.1 dynparam_1.0.0
## [311] rpart_4.1-15 zinbwave_1.8.0
## [313] compiler_3.6.3 ggtext_0.1.0